home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Games of Daze
/
Infomagic - Games of Daze (Summer 1995) (Disc 1 of 2).iso
/
x2ftp
/
msdos
/
pmode
/
exc_dx02
/
uasm.c
< prev
next >
Wrap
C/C++ Source or Header
|
1994-08-03
|
20KB
|
715 lines
/* This is the exc-32 Debugger disassembler. */
/*
MMURTL Operating System Source Code
Copyright 1991,1992,1993, Richard A. Burgess
ALL RIGHTS RESERVED
Version x0.8
*/
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "uasm.h"
/* Handy type defines */
#define U32 unsigned long
#define U16 unsigned short
#define U8 unsigned char
#define S32 signed long
#define S16 signed short
#define S8 signed char
#define SEGSIZE 32
/* Protypes */
U8 getbyte (void);
U8 modrm (void);
U8 sib (void);
int bytes (char c);
void ohex (char c, int extend, int optional, int defsize);
void reg_name(U8 which, char size);
void escape (char c, char t);
void decode (char *s);
void do_sib (int m);
void do_modrm(char t);
/*
The Intel 386 DX Programmer's Reference Manual provides a table that
uses the following codes to assist in disassembly of 386 code (page A-3).
The letters below are the same as the codes in the manual. The ~ (tilde)
is an escape character to signify expansion of the codes is
required when the string is being output.
Tilde tokens in strings:
First char after '~':
A - Direct address
C - Reg of R/M picks control register
D - Reg of R/M picks debug register
E - R/M picks operand
F - Flag register
G - Reg of R/M selects a general register
I - Immediate data
J - Relative IP offset
M - R/M picks memory
O - No R/M, offset only
R - Mod of R/M picks register only
S - Reg of R/M picks segment register
T - reg of R/M picks test register
X - DS:ESI
Y - ES:EDI
2 - prefix of two-byte opcode
e - put in 'e' if use32 (second char is part of reg name)
put in 'w' for use16 or 'd' for use32 (second char is 'w')
f - Floating point (second char is esc value)
g - do R/M group 'n'
p - prefix
s - Size override (second char is a,o)
Second char after '~':
a - Two words in memory (BOUND)
b - Byte
c - Byte or word
d - DWord
p - 32 or 48 bit pointer
s - Six byte pseudo-descriptor
v - Word or DWord
w - Word
1-8 - group number, esc value, etc
*/
char *opmap1[] = {
/* 0 */
"ADD ~Eb,~Gb", "ADD ~Ev,~Gv", "ADD ~Gb,~Eb", "ADD ~Gv,~Ev",
"ADD AL,~Ib", "ADD ~eAX,~Iv", "PUSH ES", "POP ES",
"OR ~Eb,~Gb", "OR ~Ev,~Gv", "OR ~Gb,~Eb", "OR ~Gv,~Ev",
"OR AL,~Ib", "OR ~eAX,~Iv", "PUSH CS", "~2 ",
/* 1 */
"ADC ~Eb,~Gb", "ADC ~Ev,~Gv", "ADC ~Gb,~Eb", "ADC ~Gv,~Ev",
"ADC AL,~Ib", "ADC ~eAX,~Iv", "PUSH SS", "POP SS",
"SBB ~Eb,~Gb", "SBB ~Ev,~Gv", "SBB ~Gb,~Eb", "SBB ~Gv,~Ev",
"SBB AL,~Ib", "SBB ~eAX,~Iv", "PUSH DS", "POP DS",
/* 2 */
"AND ~Eb,~Gb", "AND ~Ev,~Gv", "AND ~Gb,~Eb", "AND ~Gv,~Ev",
"AND AL,~Ib", "AND ~eAX,~Iv", "~pE", "DAA",
"SUB ~Eb,~Gb", "SUB ~Ev,~Gv", "SUB ~Gb,~Eb", "SUB ~Gv,~Ev",
"SUB AL,~Ib", "SUB ~eAX,~Iv", "~pC", "DAS",
/* 3 */
"XOR ~Eb,~Gb", "XOR ~Ev,~Gv", "XOR ~Gb,~Eb", "XOR ~Gv,~Ev",
"XOR AL,~Ib", "XOR ~eAX,~Iv", "~pS", "AAA",
"CMP ~Eb,~Gb", "CMP ~Ev,~Gv", "CMP ~Gb,~Eb", "CMP ~Gv,~Ev",
"CMP AL,~Ib", "CMP ~eAX,~Iv", "~pD", "AAS",
/* 4 */
"INC ~eAX", "INC ~eCX", "INC ~eDX", "INC ~eBX",
"INC ~eSP", "INC ~eBP", "INC ~eSI", "INC ~eDI",
"DEC ~eAX", "DEC ~eCX", "DEC ~eDX", "DEC ~eBX",
"DEC ~eSP", "DEC ~eBP", "DEC ~eSI", "DEC ~eDI",
/* 5 */
"PUSH ~eAX", "PUSH ~eCX", "PUSH ~eDX", "PUSH ~eBX",
"PUSH ~eSP", "PUSH ~eBP", "PUSH ~eSI", "PUSH ~eDI",
"POP ~eAX", "POP ~eCX", "POP ~eDX", "POP ~eBX",
"POP ~eSP", "POP ~eBP", "POP ~eSI", "POP ~eDI",
/* 6 */
"PUSHA", "POPA", "BOUND ~Gv,~Ma", "ARPL ~Ew,~Rw",
"~pF", "~pG", "~so", "~sa",
"PUSH ~Iv", "IMUL ~Gv,~Ev*~Iv", "PUSH ~Ib", "IMUL ~Gv,~Ev*~Ib",
// !! ^was = !! ^was =
"INSB ~Yb,DX", "INS~ew ~Yv,DX", "OUTSB DX,~Xb", "OUTS~ew DX,~Xv",
/* 7 */
"JO ~Jb", "JNO ~Jb", "JNC ~Jb", "JC ~Jb",
"JZ ~Jb", "JNZ ~Jb", "JBE ~Jb", "JNBE ~Jb",
"JS ~Jb", "JNS ~Jb", "JPE ~Jb", "JPO ~Jb",
"JL ~Jb", "JGE ~Jb", "JLE ~Jb", "JG ~Jb",
/* 8 */
"~g1 ~Eb,~Ib", "~g1 ~Ev,~Iv", "MOV AL,~Ib", "~g1 ~Ev,~Ib",
"TEST ~Eb,~Gb", "TEST ~Ev,~Gv", "XCHG ~Eb,~Gb", "XCHG ~Ev,~Gv",
"MOV ~Eb,~Gb", "MOV ~Ev,~Gv", "MOV ~Gb,~Eb", "MOV ~Gv,~Ev",
"MOV ~Ew,~Sw", "LEA ~Gv,~M ", "MOV ~Sw,~Ew", "POP ~Ev",
/* 9 */
"NOP", "XCHG ~eAX,~eCX", "XCHG ~eAX,~eDX", "XCHG ~eAX,~eBX",
"XCHG ~eAX,~eSP", "XCHG ~eAX,~eBP", "XCHG ~eAX,~eSI", "XCHG ~eAX,~eDI",
"CBW", "CDW", "CALL ~Ap", "FWAIT",
"PUSH ~eflags", "POP ~eflags", "SAHF", "LAHF",
/* a */
"MOV AL,~Ov", "MOV ~eAX,~Ov", "MOV ~Ov,al", "MOV ~Ov,~eAX",
"MOVSB ~Xb,~Yb", "MOVS~ew ~Xv,~Yv", "CMPSB ~Xb,~Yb", "CMPS~ew ~Xv,~Yv",
"TEST AL,~Ib", "TEST ~eAX,~Iv", "STOSB ~Yb,AL", "STOS~ew ~Yv,~eAX",
"LODSB AL,~Xb", "LODS~ew ~eAX,~Xv", "SCASB AL,~Xb", "SCAS~ew ~eAX,~Xv",
/* b */
"MOV AL,~Ib", "MOV CL,~Ib", "MOV DL,~Ib", "MOV BL,~Ib",
"MOV AH,~Ib", "MOV CH,~Ib", "MOV DH,~Ib", "MOV BH,~Ib",
"MOV ~eAX,~Iv", "MOV ~eCX,~Iv", "MOV ~eDX,~Iv", "MOV ~eBX,~Iv",
"MOV ~eSP,~Iv", "MOV ~eBP,~Iv", "MOV ~eSI,~Iv", "MOV ~eDI,~Iv",
/* c */
"~g2 ~Eb,~Ib", "~g2 ~Ev,~Ib", "RET ~Iw", "RET",
"LES ~Gv,~Mp", "LDS ~Gv,~Mp", "MOV ~Eb,~Ib", "MOV ~Ev,~Iv",
"ENTER ~Iw,~Ib", "LEAVE", "RETF ~Iw", "RETF",
"INT 3", "INT ~Ib", "INTO", "IRETD",
/* d */
"~g2 ~Eb,1", "~g2 ~Ev,1", "~g2 ~Eb,cl", "~g2 ~Ev,cl",
"AAM", "AAD", 0, "XLAT",
/*
"ESC 0,~Ib", "ESC 1,~Ib", "ESC 2,~Ib", "ESC 3,~Ib",
"ESC 4,~Ib", "ESC 5,~Ib", "ESC 6,~Ib", "ESC 7,~Ib",
*/
"~f0", "~f1", "~f2", "~f3",
"~f4", "~f5", "~f6", "~f7",
/* e */
"LOOPNE ~Jb", "LOOPE ~Jb", "LOOP ~Jb", "JCXZ ~Jb",
"IN AL,~Ib", "IN ~eAX,~Ib", "OUT ~Ib,AL", "OUT ~Ib,~eAX",
"CALL ~Jv", "JMP ~Jv", "JMP ~Ap", "JMP ~Jb",
"IN AL,DX", "IN ~eAX,DX", "OUT DX,AL", "OUT DX,~eAX",
/* f */
"LOCK~p ", 0, "REPNE~p ", "REP(e)~p ",
"HLT", "CMC", "~g3", "~g0",
"CLC", "STC", "CLI", "STI",
"CLD", "STD", "~g4", "~g5"
};
char *SecOp00[] = {
/* 0 */
"~g6", "~g7", "LAR ~Gv,~Ew", "LSL ~Gv,~Ew", 0, 0, "CLTS", 0,
0, 0, 0, 0, 0, 0, 0, 0 };
char *SecOp20[] = {
/* 2 */
"MOV ~Rd,~Cd", "MOV ~Rd,~Dd", "MOV ~Cd,~Rd", "MOV ~Dd,~Rd",
"MOV ~Rd,~Td", 0, "MOV ~Td,~Rd", 0,
0, 0, 0, 0, 0, 0, 0, 0};
char *SecOp80[] = {
"JO ~Jv", "JNO ~Jv", "JC ~Jv", "JNC ~Jv",
"JZ ~Jv", "JNZ ~Jv", "JBE ~Jv", "JNBE ~Jv",
"JS ~Jv", "JNS ~Jv", "JPE ~Jv", "JPO ~Jv",
"JL ~Jv", "JGE ~Jv", "JLE ~Jv", "JG ~Jv",
/* 9 */
"SETO ~Eb", "SETNO ~Eb", "SETNC ~Eb", "SETC ~Eb",
"SETZ ~Eb", "SETNZ ~Eb", "SETBE ~Eb", "SETNBE ~Eb",
"SETS ~Eb", "SETNS ~Eb", "SETP ~Eb", "SETNP ~Eb",
"SETL ~Eb", "SETGE ~Eb", "SETLE ~Eb", "SETG ~Eb",
/* a */
"PUSH FS", "POP FS", 0, "BT ~Ev,~Gv",
"SHLD ~Ev,~Gv,~Ib", "SHLD ~Ev,~Gv,cl", 0, 0,
"PUSH GS", "POP GS", 0, "BTS ~Ev,~Gv",
"SHRD ~Ev,~Gv,~Ib", "SHRD ~Ev,~Gv,cl", 0, "IMUL ~Gv,~Ev",
/* b */
0, 0, "LSS ~Mp", "BTR ~Ev,~Gv",
"LFS ~Mp", "LGS ~Mp", "MOVZX ~Gv,~Eb", "MOVZX ~Gv,~Ew",
0, 0, "~g8 ~Ev,~Ib", "BTC ~Ev,~Gv",
"BSF ~Gv,~Ev", "BSR~Gv,~Ev", "MOVSX ~Gv,~Eb", "MOVSX ~Gv,~Ew",
};
/* NOTE: Second byte of 2 byte OpCodes are Invalid if over 0xBF */
char *groups[9][8] = { /* group 0 is group 3 for ~Ev set */
{ "TEST ~Ev,~Iv", "TEST ~Ev,~Iv,", "NOT ~Ev", "NEG ~Ev",
"MUL ~eAX,~Ev", "IMUL ~eAX,~Ev", "DIV ~eAX,~Ev", "IDIV ~eAX,~Ev" },
{ "ADD", "OR", "ADC", "SBB", "AND", "SUB", "XOR", "CMP" },
{ "ROL", "ROR","RCL", "RCR", "SHL", "SHR", "SHL", "SAR" },
{ "TEST ~Eb,~Ib", "TEST ~Eb,~Ib,", "NOT ~Eb", "NEG ~Eb",
"MUL AL,~Eb", "IMUL AL,~Eb", "DIV AL,~Eb", "IDIV AL,~Eb" },
{ "INC ~Eb", "DEC ~Eb", 0, 0, 0, 0, 0, 0 },
{ "INC ~Ev", "DEC ~Ev", "CALL ~Ev", "CALL ~Ep",
"JMP ~Ev", "JMP ~Ep", "PUSH ~Ev", 0 },
{ "SLDT ~Ew", "STR ~Ew", "LLDT ~Ew", "LTR ~Ew",
"VERR ~Ew", "VERW ~Ew", 0, 0 },
{ "SGDT ~Ms", "SIDT ~Ms", "LGDT ~Ms", "LIDT ~Ms",
"SMSW ~Ew", 0, "LMSW ~Ew", 0 },
{ 0, 0, 0, 0, "BT", "BTS", "BTR", "BTC" }
};
/* for display */
char *seg_names[]= {"ES","CS","SS","DS","FS","GS"};
char *breg_names[]={"AL","CL","DL","BL","AH","CH","DH","BH" };
char *wreg_names[]={"AX","CX","DX","BX","SP","BP","SI","DI" };
char *dreg_names[]={"EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI" };
S16 prefix;
U8 modrmv;
S8 fmodrmv;
U8 sibv;
S8 fsibv;
S16 opsize;
U8 *addrIn;
void *disasm_outAdr;
static char outBuf[200];
static char *outPtr = outBuf;
static void outf(char *text,...)
{
va_list args;
va_start(args,text);
vsprintf (outPtr,text,args);
outPtr = strchr(outBuf,0);
va_end(args);
}
/*****************************************************
Gets a byte to disassemble and update addrIn.
******************************************************/
U8 getbyte(void)
{
return *addrIn++;
}
/*************************************************/
/* Get Mod/RM field byte for current instruction */
U8 modrm(void)
{
if (!fmodrmv)
{
modrmv = getbyte();
fmodrmv = 1;
}
return modrmv;
}
/*******************************************************/
/* Get 'scale-index-base' byte for current instruction */
U8 sib(void)
{
if (!fsibv)
{
sibv = getbyte();
fsibv = 1;
}
return sibv;
}
/**********************************************************/
/* The register is encode as bit 3,4,5 in the byte.
xxRRRxxx
This macro extracts it. Used in several places.
*/
#define reg(a) (((a)>>3)&7)
/*------------------------------------------------------------------------*/
/*------------------------------------------------------------------------*/
/* Determines how many bytes left in the instruction from the
letter in the table (which is passed in here).
*/
int bytes(char c)
{
switch (c)
{
case 'b':
return 1;
case 'w':
return 2;
case 'd':
return 4;
case 'v':
if (opsize == 32)
return 4;
else return 2;
}
return 0;
}
/**************************************************************
Get the correct number of bytes for immediate data from the
code stream and output it as hex.
***************************************************************/
void ohex(char c, int extend, int optional, int defsize)
{
int n, s, i;
long int *x;
unsigned char buf[6];
n=0;
s=0;
x=NULL;
switch (c)
{
case 'a': break;
case 'b': n = 1; /* byte */
break;
case 'w': n = 2; /* word */
break;
case 'd': n = 4; /* dword */
break;
case 's': n = 6; /* fword */
break;
case 'c':
case 'v': if (defsize == 32)
n = 4;
else n = 2;
break;
case 'p': if (defsize == 32) /* 32 or 48 bit pointer */
n = 6;
else n = 4;
s = 1;
break;
}
for (i=0; i<n; i++)
buf[i] = getbyte();
/* sign extend the value into a U32 */
for (; i<extend; i++)
buf[i] = (buf[i-1] & 0x80) ? 0xff : 0;
if (n <= 4) /* !! make displacements negative, GV */
{
x = (long int*)&buf;
if (*x < 0)
{
outf("-%02X",-*x);
return;
}
}
if (s) /* outputs the segment value of FAR pointer */
{
outf("%02X%02X",buf[n-1],buf[n-2]);
n -= 2;
}
if (extend > n)
{
if (!optional)
outf("+");
n = 4;
}
switch (n)
{
case 1: outf("%02X",buf[0]);
break;
case 2: outf("%02X%02X",buf[1],buf[0]);
break;
case 4: outf("%02X%02X%02X%02X",buf[3],buf[2],buf[1],buf[0]);
break;
}
}
/*------------------------------------------------------------------------*/
void reg_name(U8 which, char size)
{
if (size == 'F')
{
outf( "st(%d)",which);
return;
}
if (((size == 'v') && (opsize == 32)) || (size == 'd'))
{
outf("E");
}
if (size == 'b')
{
outf( "%s", breg_names[which]);
}
else
{
outf( "%s", wreg_names[which]);
}
}
/******************************************************************
This takes in two chars that represent part of the op code and
puts out the proper text to match what the letter represents.
c is the first char after the tilde and t is next one. See
opcode1[] strings for what the chars mean.
*******************************************************************/
void escape(char c, char t)
{
S32 delta, vals;
U8 b2;
S8 valsb;
S16 valsw;
switch (c)
{
case 'A': /* Direct Address */
ohex(t,4,0,32);
break;
case 'C': /* Reg of R/M picks control reg */
outf("CR%d",reg(modrm()));
break;
case 'D': /* Reg of R/M pick debug reg */
outf("DR%d",modrm());
break;
case 'E': /* R/M picks operand */
do_modrm(t);
break;
case 'G': /* Reg of R/M picks general reg */
if (t == 'F')
reg_name((modrm()&7),t);
else reg_name(reg(modrm()),t);
break;
case 'I': /* Immediate data */
ohex(t,0,0,opsize);
break;
case 'J': /* Relative IP offset */
switch (bytes(t))
{
case 1:
valsb = getbyte(); /* must remain signed! */
vals = valsb;
break;
case 2:
valsb = getbyte(); /*RAB Made SIGNEd bytes/Words */
valsw = getbyte()<<8;
vals = valsw + valsb;
break;
case 4:
vals = getbyte();
vals |= getbyte() << 8;
vals |= getbyte() << 16;
vals |= getbyte() << 24;
break;
}
delta = (S32) (addrIn + vals);
outf( "%X",delta);
break;
case 'M': /* R/M picks memory */
do_modrm(t);
break;
case 'O': /* NO R/M, Offset only */
decode("~p:[");
ohex(t, 4, 0, 32);
outf("]");
break;
case 'R': /* Mod of R/M pick REG only */
do_modrm(t);
break;
case 'S': /* Reg of R/M picks seg reg */
outf( "%s", seg_names[reg(modrm())]);
break;
case 'T': /* Reg of R/M picks test reg */
outf( "TR%d",modrm());
break;
case 'X': /* DS:ESI */
outf("DS:[ESI]");
break;
case 'Y': /* ES:EDI */
outf("ES:[EDI]");
break;
case '2': /* Prefix of 2 byte opcode */
b2 = getbyte();
if (b2 < 0x10)
decode(SecOp00[b2]);
else if ((b2 > 0x1F) && (b2 < 0x30))
decode(SecOp20[b2-0x20]);
else if ((b2 > 0x7F) && (b2 < 0xC0))
decode(SecOp80[b2-0x80]);
else
outf("<bogus>");
break;
case 'e': /* t is part of reg name */
if (opsize == 32)
{
if (t == 'w') /* put out "d" if t is "w" on 32 bit opsize */
outf("D");
else outf("E%c",t); /* put out "E". if t = "w" then put t */
}
else outf("%c",t);
break;
case 'f': /* floating point */
outf("<Float Op>");
/* floating_point(t-'0'); */
break;
case 'g': /* do R/M group 'n' */
decode(groups[t-'0'][reg(modrm())]);
break;
case 'p': /* Segment prefix */
switch (t)
{
case 'C': /* CS */
case 'D': /* DS */
case 'E': /* ES */
case 'F': /* FS */
case 'G': /* GS */
case 'S': /* SS */
prefix = t;
decode(opmap1[getbyte()]);
break;
case ':':
if (prefix)
outf("%cS:",prefix);
break;
case ' ':
decode(opmap1[getbyte()]);
break;
}
break;
case 's': /* Size override */
if (t=='o') /* o is operand */
{
opsize = 48 - opsize;
decode(opmap1[getbyte()]);
}
break;
}
}
/******************************************
This expands and outputs the instruction
string passed in if it finds the escape
character (tilde).
******************************************/
void decode(char *s)
{
char c;
if (s == 0) /* if NULL pointer, then it's BAD */
outf("<invalid>");
while ((c = *s++) != 0) /* put next char in c */
{
if (c == '~') /* if c is ~ then ESCAPE */
{
c = *s++; /* get letter representing value */
escape(c, *s++);
}
else
if (c == ' ') /* space */
outf(" ");
else outf("%c",c); /* else put out the char found! */
}
}
/* outputs 'scale-index-base' instructions */
void do_sib(int m)
{
int s, i, b;
s = ((sib()) >> 6) & 7; /* SSxxxxxx Scale */
i = ((sib()) >> 3) & 7; /* xxIIIxxx Index */
b = sib() & 7; /* xxxxxBBB Base */
switch (b)
{
case 0: decode("~p:[EAX"); break;
case 1: decode("~p:[ECX"); break;
case 2: decode("~p:[EDX"); break;
case 3: decode("~p:[EBX"); break;
case 4: decode("~p:[ESP"); break;
case 5:
if (m == 0)
{
decode("~p:[");
ohex('d', 4, 0, 32);
}
else decode("~p:[EBP");
break;
case 6: decode("~p:[ESI"); break;
case 7: decode("~p:[EDI"); break;
}
switch (i)
{
case 0: outf("+EAX"); break;
case 1: outf("+ECX"); break;
case 2: outf("+EDX"); break;
case 3: outf("+EBX"); break;
case 4: break;
case 5: outf("+EBP"); break;
case 6: outf("+ESI"); break;
case 7: outf("+EDI"); break;
}
if (i != 4)
switch (s)
{
case 0: break;
case 1: outf("*2"); break;
case 2: outf("*4"); break;
case 3: outf("*8"); break;
}
}
/*------------------------------------------------------------------------*/
void do_modrm(char t)
{
int m;
int r;
m = ((modrm()) >> 6) & 7;
r = modrm() & 7;
if (m == 3)
{
reg_name(r,t);
return;
}
if ((m == 0) && (r == 5))
{
decode("~p:[");
ohex('d',4,0,32);
outf("]");
return;
}
if (r != 4)
decode("~p:[");
switch (r)
{
case 0: outf("EAX"); break;
case 1: outf("ECX"); break;
case 2: outf("EDX"); break;
case 3: outf("EBX"); break;
case 4: do_sib(m); break;
case 5: outf("EBP"); break;
case 6: outf("ESI"); break;
case 7: outf("EDI"); break;
}
switch (m)
{
case 1: ohex('b',4,0,32);
break;
case 2: outf("+");
ohex('v',4,0,32);
break;
}
outf("]");
}
/***********************************************
This disassembles one instruction each time it
is called.
************************************************/
char * disassemble(void * addr)
{
prefix = 0;
fmodrmv = 0;
fsibv = 0;
opsize = SEGSIZE; /* default operand size is DWORD */
addrIn = addr;
outPtr = (char*)&outBuf;
decode(opmap1[getbyte()]); /* decode instruction and output */
disasm_outAdr = (void*)addrIn;
return outBuf;
}